{ "cells": [ { "cell_type": "markdown", "id": "ae7c8606", "metadata": {}, "source": [ "# TAREA FINAL MACHINE LEARNING" ] }, { "cell_type": "markdown", "id": "6f7c38ca", "metadata": {}, "source": [ "## 1. Introducción\n", "Objetivo del proyecto\n", "\n", "El objetivo de este proyecto es desarrollar un modelo de machine learning para predecir el estado funcional de pozos de agua en Tanzania, basado en datos históricos proporcionados en el desafío \"Pump it Up: Data Mining the Water Table\". Los pozos pueden clasificarse en tres categorías:\n", "\n", "- Funcional: El pozo opera correctamente.\n", "- No funcional: El pozo está fuera de servicio.\n", "- Necesita reparación: El pozo funciona, pero requiere mantenimiento." ] }, { "cell_type": "code", "execution_count": 205, "id": "7c19c29d", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "import requests\n", "import re\n", "from datetime import datetime\n", "from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, StratifiedKFold\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.tree import DecisionTreeClassifier\n", "from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier\n", "from sklearn.svm import LinearSVC, SVC\n", "from xgboost import XGBClassifier\n", "from sklearn.naive_bayes import GaussianNB\n", "from sklearn.preprocessing import MinMaxScaler, LabelEncoder\n", "from imblearn.over_sampling import SMOTE\n", "from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, f1_score, precision_score, recall_score, roc_curve\n", "from category_encoders import TargetEncoder\n", "import pickle\n", "import plotly.express as px\n", "import plotly.graph_objects as go\n", "import mlflow\n", "\n", "sns.set()\n", "%matplotlib inline" ] }, { "cell_type": "markdown", "id": "316ca264", "metadata": {}, "source": [ "## 2. Cargar datos" ] }, { "cell_type": "code", "execution_count": null, "id": "bede1d19", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Dimensiones del conjunto de entrenamiento: (59400, 40)\n", "Dimensiones de las etiquetas: (59400, 2)\n", "Dimensiones del conjunto combinado: (59400, 41)\n" ] }, { "data": { "text/html": [ "
| \n", " | id | \n", "amount_tsh | \n", "date_recorded | \n", "funder | \n", "gps_height | \n", "installer | \n", "longitude | \n", "latitude | \n", "wpt_name | \n", "num_private | \n", "basin | \n", "subvillage | \n", "region | \n", "region_code | \n", "district_code | \n", "lga | \n", "ward | \n", "population | \n", "public_meeting | \n", "recorded_by | \n", "scheme_management | \n", "scheme_name | \n", "permit | \n", "construction_year | \n", "extraction_type | \n", "extraction_type_group | \n", "extraction_type_class | \n", "management | \n", "management_group | \n", "payment | \n", "payment_type | \n", "water_quality | \n", "quality_group | \n", "quantity | \n", "quantity_group | \n", "source | \n", "source_type | \n", "source_class | \n", "waterpoint_type | \n", "waterpoint_type_group | \n", "status_group | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "69572 | \n", "6000.0 | \n", "2011-03-14 | \n", "Roman | \n", "1390 | \n", "Roman | \n", "34.938093 | \n", "-9.856322 | \n", "none | \n", "0 | \n", "Lake Nyasa | \n", "Mnyusi B | \n", "Iringa | \n", "11 | \n", "5 | \n", "Ludewa | \n", "Mundindi | \n", "109 | \n", "True | \n", "GeoData Consultants Ltd | \n", "VWC | \n", "Roman | \n", "False | \n", "1999 | \n", "gravity | \n", "gravity | \n", "gravity | \n", "vwc | \n", "user-group | \n", "pay annually | \n", "annually | \n", "soft | \n", "good | \n", "enough | \n", "enough | \n", "spring | \n", "spring | \n", "groundwater | \n", "communal standpipe | \n", "communal standpipe | \n", "functional | \n", "
| 1 | \n", "8776 | \n", "0.0 | \n", "2013-03-06 | \n", "Grumeti | \n", "1399 | \n", "GRUMETI | \n", "34.698766 | \n", "-2.147466 | \n", "Zahanati | \n", "0 | \n", "Lake Victoria | \n", "Nyamara | \n", "Mara | \n", "20 | \n", "2 | \n", "Serengeti | \n", "Natta | \n", "280 | \n", "NaN | \n", "GeoData Consultants Ltd | \n", "Other | \n", "NaN | \n", "True | \n", "2010 | \n", "gravity | \n", "gravity | \n", "gravity | \n", "wug | \n", "user-group | \n", "never pay | \n", "never pay | \n", "soft | \n", "good | \n", "insufficient | \n", "insufficient | \n", "rainwater harvesting | \n", "rainwater harvesting | \n", "surface | \n", "communal standpipe | \n", "communal standpipe | \n", "functional | \n", "
| 2 | \n", "34310 | \n", "25.0 | \n", "2013-02-25 | \n", "Lottery Club | \n", "686 | \n", "World vision | \n", "37.460664 | \n", "-3.821329 | \n", "Kwa Mahundi | \n", "0 | \n", "Pangani | \n", "Majengo | \n", "Manyara | \n", "21 | \n", "4 | \n", "Simanjiro | \n", "Ngorika | \n", "250 | \n", "True | \n", "GeoData Consultants Ltd | \n", "VWC | \n", "Nyumba ya mungu pipe scheme | \n", "True | \n", "2009 | \n", "gravity | \n", "gravity | \n", "gravity | \n", "vwc | \n", "user-group | \n", "pay per bucket | \n", "per bucket | \n", "soft | \n", "good | \n", "enough | \n", "enough | \n", "dam | \n", "dam | \n", "surface | \n", "communal standpipe multiple | \n", "communal standpipe | \n", "functional | \n", "
| 3 | \n", "67743 | \n", "0.0 | \n", "2013-01-28 | \n", "Unicef | \n", "263 | \n", "UNICEF | \n", "38.486161 | \n", "-11.155298 | \n", "Zahanati Ya Nanyumbu | \n", "0 | \n", "Ruvuma / Southern Coast | \n", "Mahakamani | \n", "Mtwara | \n", "90 | \n", "63 | \n", "Nanyumbu | \n", "Nanyumbu | \n", "58 | \n", "True | \n", "GeoData Consultants Ltd | \n", "VWC | \n", "NaN | \n", "True | \n", "1986 | \n", "submersible | \n", "submersible | \n", "submersible | \n", "vwc | \n", "user-group | \n", "never pay | \n", "never pay | \n", "soft | \n", "good | \n", "dry | \n", "dry | \n", "machine dbh | \n", "borehole | \n", "groundwater | \n", "communal standpipe multiple | \n", "communal standpipe | \n", "non functional | \n", "
| 4 | \n", "19728 | \n", "0.0 | \n", "2011-07-13 | \n", "Action In A | \n", "0 | \n", "Artisan | \n", "31.130847 | \n", "-1.825359 | \n", "Shuleni | \n", "0 | \n", "Lake Victoria | \n", "Kyanyamisa | \n", "Kagera | \n", "18 | \n", "1 | \n", "Karagwe | \n", "Nyakasimbi | \n", "0 | \n", "True | \n", "GeoData Consultants Ltd | \n", "NaN | \n", "NaN | \n", "True | \n", "0 | \n", "gravity | \n", "gravity | \n", "gravity | \n", "other | \n", "other | \n", "never pay | \n", "never pay | \n", "soft | \n", "good | \n", "seasonal | \n", "seasonal | \n", "rainwater harvesting | \n", "rainwater harvesting | \n", "surface | \n", "communal standpipe | \n", "communal standpipe | \n", "functional | \n", "
| \n", " | \n", " | id | \n", "amount_tsh | \n", "date_recorded | \n", "funder | \n", "gps_height | \n", "installer | \n", "longitude | \n", "latitude | \n", "wpt_name | \n", "num_private | \n", "basin | \n", "subvillage | \n", "region | \n", "region_code | \n", "district_code | \n", "lga | \n", "ward | \n", "population | \n", "public_meeting | \n", "recorded_by | \n", "scheme_name | \n", "permit | \n", "construction_year | \n", "extraction_type | \n", "extraction_type_group | \n", "extraction_type_class | \n", "payment | \n", "payment_type | \n", "water_quality | \n", "quality_group | \n", "quantity | \n", "quantity_group | \n", "source | \n", "source_type | \n", "source_class | \n", "waterpoint_type | \n", "waterpoint_type_group | \n", "status_group | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| management_group | \n", "management | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
| commercial | \n", "company | \n", "685 | \n", "685 | \n", "685 | \n", "663 | \n", "685 | \n", "663 | \n", "685 | \n", "685 | \n", "685 | \n", "685 | \n", "685 | \n", "685 | \n", "685 | \n", "685 | \n", "685 | \n", "685 | \n", "685 | \n", "685 | \n", "684 | \n", "685 | \n", "654 | \n", "658 | \n", "685 | \n", "685 | \n", "685 | \n", "685 | \n", "685 | \n", "685 | \n", "685 | \n", "685 | \n", "685 | \n", "685 | \n", "685 | \n", "685 | \n", "685 | \n", "685 | \n", "685 | \n", "685 | \n", "
| private operator | \n", "1971 | \n", "1971 | \n", "1971 | \n", "1957 | \n", "1971 | \n", "1959 | \n", "1971 | \n", "1971 | \n", "1971 | \n", "1971 | \n", "1971 | \n", "1932 | \n", "1971 | \n", "1971 | \n", "1971 | \n", "1971 | \n", "1971 | \n", "1971 | \n", "1660 | \n", "1971 | \n", "986 | \n", "1893 | \n", "1971 | \n", "1971 | \n", "1971 | \n", "1971 | \n", "1971 | \n", "1971 | \n", "1971 | \n", "1971 | \n", "1971 | \n", "1971 | \n", "1971 | \n", "1971 | \n", "1971 | \n", "1971 | \n", "1971 | \n", "1971 | \n", "|
| trust | \n", "78 | \n", "78 | \n", "78 | \n", "78 | \n", "78 | \n", "78 | \n", "78 | \n", "78 | \n", "78 | \n", "78 | \n", "78 | \n", "78 | \n", "78 | \n", "78 | \n", "78 | \n", "78 | \n", "78 | \n", "78 | \n", "77 | \n", "78 | \n", "26 | \n", "77 | \n", "78 | \n", "78 | \n", "78 | \n", "78 | \n", "78 | \n", "78 | \n", "78 | \n", "78 | \n", "78 | \n", "78 | \n", "78 | \n", "78 | \n", "78 | \n", "78 | \n", "78 | \n", "78 | \n", "|
| water authority | \n", "904 | \n", "904 | \n", "904 | \n", "836 | \n", "904 | \n", "836 | \n", "904 | \n", "904 | \n", "904 | \n", "904 | \n", "904 | \n", "895 | \n", "904 | \n", "904 | \n", "904 | \n", "904 | \n", "904 | \n", "904 | \n", "878 | \n", "904 | \n", "623 | \n", "825 | \n", "904 | \n", "904 | \n", "904 | \n", "904 | \n", "904 | \n", "904 | \n", "904 | \n", "904 | \n", "904 | \n", "904 | \n", "904 | \n", "904 | \n", "904 | \n", "904 | \n", "904 | \n", "904 | \n", "|
| other | \n", "other | \n", "844 | \n", "844 | \n", "844 | \n", "837 | \n", "844 | \n", "831 | \n", "844 | \n", "844 | \n", "844 | \n", "844 | \n", "844 | \n", "839 | \n", "844 | \n", "844 | \n", "844 | \n", "844 | \n", "844 | \n", "844 | \n", "689 | \n", "844 | \n", "270 | \n", "744 | \n", "844 | \n", "844 | \n", "844 | \n", "844 | \n", "844 | \n", "844 | \n", "844 | \n", "844 | \n", "844 | \n", "844 | \n", "844 | \n", "844 | \n", "844 | \n", "844 | \n", "844 | \n", "844 | \n", "
| other - school | \n", "99 | \n", "99 | \n", "99 | \n", "99 | \n", "99 | \n", "99 | \n", "99 | \n", "99 | \n", "99 | \n", "99 | \n", "99 | \n", "99 | \n", "99 | \n", "99 | \n", "99 | \n", "99 | \n", "99 | \n", "99 | \n", "99 | \n", "99 | \n", "0 | \n", "99 | \n", "99 | \n", "99 | \n", "99 | \n", "99 | \n", "99 | \n", "99 | \n", "99 | \n", "99 | \n", "99 | \n", "99 | \n", "99 | \n", "99 | \n", "99 | \n", "99 | \n", "99 | \n", "99 | \n", "|
| parastatal | \n", "parastatal | \n", "1768 | \n", "1768 | \n", "1768 | \n", "1624 | \n", "1768 | \n", "1626 | \n", "1768 | \n", "1768 | \n", "1768 | \n", "1768 | \n", "1768 | \n", "1768 | \n", "1768 | \n", "1768 | \n", "1768 | \n", "1768 | \n", "1768 | \n", "1768 | \n", "1518 | \n", "1768 | \n", "533 | \n", "1595 | \n", "1768 | \n", "1768 | \n", "1768 | \n", "1768 | \n", "1768 | \n", "1768 | \n", "1768 | \n", "1768 | \n", "1768 | \n", "1768 | \n", "1768 | \n", "1768 | \n", "1768 | \n", "1768 | \n", "1768 | \n", "1768 | \n", "
| unknown | \n", "unknown | \n", "561 | \n", "561 | \n", "561 | \n", "533 | \n", "561 | \n", "527 | \n", "561 | \n", "561 | \n", "561 | \n", "561 | \n", "561 | \n", "561 | \n", "561 | \n", "561 | \n", "561 | \n", "561 | \n", "561 | \n", "561 | \n", "301 | \n", "561 | \n", "240 | \n", "519 | \n", "561 | \n", "561 | \n", "561 | \n", "561 | \n", "561 | \n", "561 | \n", "561 | \n", "561 | \n", "561 | \n", "561 | \n", "561 | \n", "561 | \n", "561 | \n", "561 | \n", "561 | \n", "561 | \n", "
| user-group | \n", "vwc | \n", "40507 | \n", "40507 | \n", "40507 | \n", "37630 | \n", "40507 | \n", "37630 | \n", "40507 | \n", "40507 | \n", "40507 | \n", "40507 | \n", "40507 | \n", "40189 | \n", "40507 | \n", "40507 | \n", "40507 | \n", "40507 | \n", "40507 | \n", "40507 | \n", "39208 | \n", "40507 | \n", "21845 | \n", "38296 | \n", "40507 | \n", "40507 | \n", "40507 | \n", "40507 | \n", "40507 | \n", "40507 | \n", "40507 | \n", "40507 | \n", "40507 | \n", "40507 | \n", "40507 | \n", "40507 | \n", "40507 | \n", "40507 | \n", "40507 | \n", "40507 | \n", "
| water board | \n", "2933 | \n", "2933 | \n", "2933 | \n", "2715 | \n", "2933 | \n", "2714 | \n", "2933 | \n", "2933 | \n", "2932 | \n", "2933 | \n", "2933 | \n", "2933 | \n", "2933 | \n", "2933 | \n", "2933 | \n", "2933 | \n", "2933 | \n", "2933 | \n", "2893 | \n", "2933 | \n", "2579 | \n", "2830 | \n", "2933 | \n", "2933 | \n", "2933 | \n", "2933 | \n", "2933 | \n", "2933 | \n", "2933 | \n", "2933 | \n", "2933 | \n", "2933 | \n", "2933 | \n", "2933 | \n", "2933 | \n", "2933 | \n", "2933 | \n", "2933 | \n", "|
| wua | \n", "2535 | \n", "2535 | \n", "2535 | \n", "2308 | \n", "2535 | \n", "2309 | \n", "2535 | \n", "2535 | \n", "2535 | \n", "2535 | \n", "2535 | \n", "2535 | \n", "2535 | \n", "2535 | \n", "2535 | \n", "2535 | \n", "2535 | \n", "2535 | \n", "2522 | \n", "2535 | \n", "2005 | \n", "2468 | \n", "2535 | \n", "2535 | \n", "2535 | \n", "2535 | \n", "2535 | \n", "2535 | \n", "2535 | \n", "2535 | \n", "2535 | \n", "2535 | \n", "2535 | \n", "2535 | \n", "2535 | \n", "2535 | \n", "2535 | \n", "2535 | \n", "|
| wug | \n", "6515 | \n", "6515 | \n", "6515 | \n", "6483 | \n", "6515 | \n", "6473 | \n", "6515 | \n", "6515 | \n", "6514 | \n", "6515 | \n", "6515 | \n", "6515 | \n", "6515 | \n", "6515 | \n", "6515 | \n", "6515 | \n", "6515 | \n", "6515 | \n", "5537 | \n", "6515 | \n", "829 | \n", "6340 | \n", "6515 | \n", "6515 | \n", "6515 | \n", "6515 | \n", "6515 | \n", "6515 | \n", "6515 | \n", "6515 | \n", "6515 | \n", "6515 | \n", "6515 | \n", "6515 | \n", "6515 | \n", "6515 | \n", "6515 | \n", "6515 | \n", "
| \n", " | \n", " | \n", " | id | \n", "amount_tsh | \n", "date_recorded | \n", "funder | \n", "gps_height | \n", "installer | \n", "longitude | \n", "latitude | \n", "wpt_name | \n", "num_private | \n", "basin | \n", "subvillage | \n", "region | \n", "region_code | \n", "district_code | \n", "lga | \n", "ward | \n", "population | \n", "public_meeting | \n", "recorded_by | \n", "scheme_name | \n", "permit | \n", "construction_year | \n", "extraction_type | \n", "extraction_type_group | \n", "extraction_type_class | \n", "management | \n", "payment | \n", "payment_type | \n", "water_quality | \n", "quality_group | \n", "quantity | \n", "waterpoint_type | \n", "waterpoint_type_group | \n", "status_group | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| source_class | \n", "source_type | \n", "source | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
| groundwater | \n", "borehole | \n", "hand dtw | \n", "874 | \n", "874 | \n", "874 | \n", "868 | \n", "874 | \n", "868 | \n", "874 | \n", "874 | \n", "874 | \n", "874 | \n", "874 | \n", "874 | \n", "874 | \n", "874 | \n", "874 | \n", "874 | \n", "874 | \n", "874 | \n", "787 | \n", "874 | \n", "115 | \n", "869 | \n", "874 | \n", "874 | \n", "874 | \n", "874 | \n", "874 | \n", "874 | \n", "874 | \n", "874 | \n", "874 | \n", "874 | \n", "874 | \n", "874 | \n", "874 | \n", "
| machine dbh | \n", "11075 | \n", "11075 | \n", "11075 | \n", "10251 | \n", "11075 | \n", "10246 | \n", "11075 | \n", "11075 | \n", "11074 | \n", "11075 | \n", "11075 | \n", "10849 | \n", "11075 | \n", "11075 | \n", "11075 | \n", "11075 | \n", "11075 | \n", "11075 | \n", "10253 | \n", "11075 | \n", "5459 | \n", "10293 | \n", "11075 | \n", "11075 | \n", "11075 | \n", "11075 | \n", "11075 | \n", "11075 | \n", "11075 | \n", "11075 | \n", "11075 | \n", "11075 | \n", "11075 | \n", "11075 | \n", "11075 | \n", "||
| shallow well | \n", "shallow well | \n", "16824 | \n", "16824 | \n", "16824 | \n", "16301 | \n", "16824 | \n", "16286 | \n", "16824 | \n", "16824 | \n", "16824 | \n", "16824 | \n", "16824 | \n", "16817 | \n", "16824 | \n", "16824 | \n", "16824 | \n", "16824 | \n", "16824 | \n", "16824 | \n", "15522 | \n", "16824 | \n", "608 | \n", "16253 | \n", "16824 | \n", "16824 | \n", "16824 | \n", "16824 | \n", "16824 | \n", "16824 | \n", "16824 | \n", "16824 | \n", "16824 | \n", "16824 | \n", "16824 | \n", "16824 | \n", "16824 | \n", "|
| spring | \n", "spring | \n", "17021 | \n", "17021 | \n", "17021 | \n", "15870 | \n", "17021 | \n", "15870 | \n", "17021 | \n", "17021 | \n", "17020 | \n", "17021 | \n", "17021 | \n", "16886 | \n", "17021 | \n", "17021 | \n", "17021 | \n", "17021 | \n", "17021 | \n", "17021 | \n", "16384 | \n", "17021 | \n", "14896 | \n", "15981 | \n", "17021 | \n", "17021 | \n", "17021 | \n", "17021 | \n", "17021 | \n", "17021 | \n", "17021 | \n", "17021 | \n", "17021 | \n", "17021 | \n", "17021 | \n", "17021 | \n", "17021 | \n", "|
| surface | \n", "dam | \n", "dam | \n", "656 | \n", "656 | \n", "656 | \n", "647 | \n", "656 | \n", "646 | \n", "656 | \n", "656 | \n", "656 | \n", "656 | \n", "656 | \n", "656 | \n", "656 | \n", "656 | \n", "656 | \n", "656 | \n", "656 | \n", "656 | \n", "614 | \n", "656 | \n", "474 | \n", "630 | \n", "656 | \n", "656 | \n", "656 | \n", "656 | \n", "656 | \n", "656 | \n", "656 | \n", "656 | \n", "656 | \n", "656 | \n", "656 | \n", "656 | \n", "656 | \n", "
| rainwater harvesting | \n", "rainwater harvesting | \n", "2295 | \n", "2295 | \n", "2295 | \n", "2099 | \n", "2295 | \n", "2096 | \n", "2295 | \n", "2295 | \n", "2295 | \n", "2295 | \n", "2295 | \n", "2293 | \n", "2295 | \n", "2295 | \n", "2295 | \n", "2295 | \n", "2295 | \n", "2295 | \n", "2089 | \n", "2295 | \n", "254 | \n", "2039 | \n", "2295 | \n", "2295 | \n", "2295 | \n", "2295 | \n", "2295 | \n", "2295 | \n", "2295 | \n", "2295 | \n", "2295 | \n", "2295 | \n", "2295 | \n", "2295 | \n", "2295 | \n", "|
| river/lake | \n", "lake | \n", "765 | \n", "765 | \n", "765 | \n", "763 | \n", "765 | \n", "762 | \n", "765 | \n", "765 | \n", "765 | \n", "765 | \n", "765 | \n", "764 | \n", "765 | \n", "765 | \n", "765 | \n", "765 | \n", "765 | \n", "765 | \n", "628 | \n", "765 | \n", "597 | \n", "765 | \n", "765 | \n", "765 | \n", "765 | \n", "765 | \n", "765 | \n", "765 | \n", "765 | \n", "765 | \n", "765 | \n", "765 | \n", "765 | \n", "765 | \n", "765 | \n", "|
| river | \n", "9612 | \n", "9612 | \n", "9612 | \n", "8715 | \n", "9612 | \n", "8721 | \n", "9612 | \n", "9612 | \n", "9612 | \n", "9612 | \n", "9612 | \n", "9612 | \n", "9612 | \n", "9612 | \n", "9612 | \n", "9612 | \n", "9612 | \n", "9612 | \n", "9543 | \n", "9612 | \n", "8019 | \n", "9248 | \n", "9612 | \n", "9612 | \n", "9612 | \n", "9612 | \n", "9612 | \n", "9612 | \n", "9612 | \n", "9612 | \n", "9612 | \n", "9612 | \n", "9612 | \n", "9612 | \n", "9612 | \n", "||
| unknown | \n", "other | \n", "other | \n", "212 | \n", "212 | \n", "212 | \n", "204 | \n", "212 | \n", "204 | \n", "212 | \n", "212 | \n", "212 | \n", "212 | \n", "212 | \n", "212 | \n", "212 | \n", "212 | \n", "212 | \n", "212 | \n", "212 | \n", "212 | \n", "201 | \n", "212 | \n", "143 | \n", "208 | \n", "212 | \n", "212 | \n", "212 | \n", "212 | \n", "212 | \n", "212 | \n", "212 | \n", "212 | \n", "212 | \n", "212 | \n", "212 | \n", "212 | \n", "212 | \n", "
| unknown | \n", "66 | \n", "66 | \n", "66 | \n", "45 | \n", "66 | \n", "46 | \n", "66 | \n", "66 | \n", "66 | \n", "66 | \n", "66 | \n", "66 | \n", "66 | \n", "66 | \n", "66 | \n", "66 | \n", "66 | \n", "66 | \n", "45 | \n", "66 | \n", "25 | \n", "58 | \n", "66 | \n", "66 | \n", "66 | \n", "66 | \n", "66 | \n", "66 | \n", "66 | \n", "66 | \n", "66 | \n", "66 | \n", "66 | \n", "66 | \n", "66 | \n", "
| \n", " | \n", " | \n", " | id | \n", "amount_tsh | \n", "date_recorded | \n", "funder | \n", "gps_height | \n", "installer | \n", "longitude | \n", "latitude | \n", "wpt_name | \n", "num_private | \n", "basin | \n", "subvillage | \n", "region | \n", "region_code | \n", "district_code | \n", "lga | \n", "ward | \n", "population | \n", "public_meeting | \n", "recorded_by | \n", "scheme_name | \n", "permit | \n", "construction_year | \n", "management | \n", "payment | \n", "water_quality | \n", "quantity | \n", "source | \n", "waterpoint_type | \n", "waterpoint_type_group | \n", "status_group | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| extraction_type_class | \n", "extraction_type_group | \n", "extraction_type | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
| gravity | \n", "gravity | \n", "gravity | \n", "26780 | \n", "26780 | \n", "26780 | \n", "24704 | \n", "26780 | \n", "24714 | \n", "26780 | \n", "26780 | \n", "26779 | \n", "26780 | \n", "26780 | \n", "26646 | \n", "26780 | \n", "26780 | \n", "26780 | \n", "26780 | \n", "26780 | \n", "26780 | \n", "25829 | \n", "26780 | \n", "21791 | \n", "25234 | \n", "26780 | \n", "26780 | \n", "26780 | \n", "26780 | \n", "26780 | \n", "26780 | \n", "26780 | \n", "26780 | \n", "26780 | \n", "
| handpump | \n", "afridev | \n", "afridev | \n", "1770 | \n", "1770 | \n", "1770 | \n", "1668 | \n", "1770 | \n", "1665 | \n", "1770 | \n", "1770 | \n", "1770 | \n", "1770 | \n", "1770 | \n", "1770 | \n", "1770 | \n", "1770 | \n", "1770 | \n", "1770 | \n", "1770 | \n", "1770 | \n", "1624 | \n", "1770 | \n", "191 | \n", "1660 | \n", "1770 | \n", "1770 | \n", "1770 | \n", "1770 | \n", "1770 | \n", "1770 | \n", "1770 | \n", "1770 | \n", "1770 | \n", "
| india mark ii | \n", "india mark ii | \n", "2400 | \n", "2400 | \n", "2400 | \n", "2358 | \n", "2400 | \n", "2358 | \n", "2400 | \n", "2400 | \n", "2400 | \n", "2400 | \n", "2400 | \n", "2400 | \n", "2400 | \n", "2400 | \n", "2400 | \n", "2400 | \n", "2400 | \n", "2400 | \n", "2297 | \n", "2400 | \n", "190 | \n", "2359 | \n", "2400 | \n", "2400 | \n", "2400 | \n", "2400 | \n", "2400 | \n", "2400 | \n", "2400 | \n", "2400 | \n", "2400 | \n", "|
| india mark iii | \n", "india mark iii | \n", "98 | \n", "98 | \n", "98 | \n", "98 | \n", "98 | \n", "98 | \n", "98 | \n", "98 | \n", "98 | \n", "98 | \n", "98 | \n", "98 | \n", "98 | \n", "98 | \n", "98 | \n", "98 | \n", "98 | \n", "98 | \n", "88 | \n", "98 | \n", "3 | \n", "98 | \n", "98 | \n", "98 | \n", "98 | \n", "98 | \n", "98 | \n", "98 | \n", "98 | \n", "98 | \n", "98 | \n", "|
| nira/tanira | \n", "nira/tanira | \n", "8154 | \n", "8154 | \n", "8154 | \n", "7899 | \n", "8154 | \n", "7885 | \n", "8154 | \n", "8154 | \n", "8154 | \n", "8154 | \n", "8154 | \n", "8151 | \n", "8154 | \n", "8154 | \n", "8154 | \n", "8154 | \n", "8154 | \n", "8154 | \n", "7478 | \n", "8154 | \n", "349 | \n", "7920 | \n", "8154 | \n", "8154 | \n", "8154 | \n", "8154 | \n", "8154 | \n", "8154 | \n", "8154 | \n", "8154 | \n", "8154 | \n", "|
| other handpump | \n", "other - mkulima/shinyanga | \n", "2 | \n", "2 | \n", "2 | \n", "1 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "0 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "|
| other - play pump | \n", "85 | \n", "85 | \n", "85 | \n", "85 | \n", "85 | \n", "85 | \n", "85 | \n", "85 | \n", "85 | \n", "85 | \n", "85 | \n", "85 | \n", "85 | \n", "85 | \n", "85 | \n", "85 | \n", "85 | \n", "85 | \n", "84 | \n", "85 | \n", "66 | \n", "85 | \n", "85 | \n", "85 | \n", "85 | \n", "85 | \n", "85 | \n", "85 | \n", "85 | \n", "85 | \n", "85 | \n", "||
| other - swn 81 | \n", "229 | \n", "229 | \n", "229 | \n", "219 | \n", "229 | \n", "219 | \n", "229 | \n", "229 | \n", "229 | \n", "229 | \n", "229 | \n", "229 | \n", "229 | \n", "229 | \n", "229 | \n", "229 | \n", "229 | \n", "229 | \n", "216 | \n", "229 | \n", "4 | \n", "221 | \n", "229 | \n", "229 | \n", "229 | \n", "229 | \n", "229 | \n", "229 | \n", "229 | \n", "229 | \n", "229 | \n", "||
| walimi | \n", "48 | \n", "48 | \n", "48 | \n", "48 | \n", "48 | \n", "48 | \n", "48 | \n", "48 | \n", "48 | \n", "48 | \n", "48 | \n", "48 | \n", "48 | \n", "48 | \n", "48 | \n", "48 | \n", "48 | \n", "48 | \n", "46 | \n", "48 | \n", "0 | \n", "48 | \n", "48 | \n", "48 | \n", "48 | \n", "48 | \n", "48 | \n", "48 | \n", "48 | \n", "48 | \n", "48 | \n", "||
| swn 80 | \n", "swn 80 | \n", "3670 | \n", "3670 | \n", "3670 | \n", "3595 | \n", "3670 | \n", "3593 | \n", "3670 | \n", "3670 | \n", "3670 | \n", "3670 | \n", "3670 | \n", "3670 | \n", "3670 | \n", "3670 | \n", "3670 | \n", "3670 | \n", "3670 | \n", "3670 | \n", "3503 | \n", "3670 | \n", "111 | \n", "3655 | \n", "3670 | \n", "3670 | \n", "3670 | \n", "3670 | \n", "3670 | \n", "3670 | \n", "3670 | \n", "3670 | \n", "3670 | \n", "|
| motorpump | \n", "mono | \n", "mono | \n", "2865 | \n", "2865 | \n", "2865 | \n", "2577 | \n", "2865 | \n", "2578 | \n", "2865 | \n", "2865 | \n", "2865 | \n", "2865 | \n", "2865 | \n", "2748 | \n", "2865 | \n", "2865 | \n", "2865 | \n", "2865 | \n", "2865 | \n", "2865 | \n", "2760 | \n", "2865 | \n", "2215 | \n", "2582 | \n", "2865 | \n", "2865 | \n", "2865 | \n", "2865 | \n", "2865 | \n", "2865 | \n", "2865 | \n", "2865 | \n", "2865 | \n", "
| other motorpump | \n", "cemo | \n", "90 | \n", "90 | \n", "90 | \n", "90 | \n", "90 | \n", "90 | \n", "90 | \n", "90 | \n", "90 | \n", "90 | \n", "90 | \n", "90 | \n", "90 | \n", "90 | \n", "90 | \n", "90 | \n", "90 | \n", "90 | \n", "90 | \n", "90 | \n", "89 | \n", "90 | \n", "90 | \n", "90 | \n", "90 | \n", "90 | \n", "90 | \n", "90 | \n", "90 | \n", "90 | \n", "90 | \n", "|
| climax | \n", "32 | \n", "32 | \n", "32 | \n", "32 | \n", "32 | \n", "32 | \n", "32 | \n", "32 | \n", "32 | \n", "32 | \n", "32 | \n", "32 | \n", "32 | \n", "32 | \n", "32 | \n", "32 | \n", "32 | \n", "32 | \n", "32 | \n", "32 | \n", "29 | \n", "32 | \n", "32 | \n", "32 | \n", "32 | \n", "32 | \n", "32 | \n", "32 | \n", "32 | \n", "32 | \n", "32 | \n", "||
| other | \n", "other | \n", "other | \n", "6430 | \n", "6430 | \n", "6430 | \n", "6010 | \n", "6430 | \n", "6002 | \n", "6430 | \n", "6430 | \n", "6429 | \n", "6430 | \n", "6430 | \n", "6421 | \n", "6430 | \n", "6430 | \n", "6430 | \n", "6430 | \n", "6430 | \n", "6430 | \n", "5980 | \n", "6430 | \n", "860 | \n", "6050 | \n", "6430 | \n", "6430 | \n", "6430 | \n", "6430 | \n", "6430 | \n", "6430 | \n", "6430 | \n", "6430 | \n", "6430 | \n", "
| rope pump | \n", "rope pump | \n", "other - rope pump | \n", "451 | \n", "451 | \n", "451 | \n", "448 | \n", "451 | \n", "448 | \n", "451 | \n", "451 | \n", "451 | \n", "451 | \n", "451 | \n", "451 | \n", "451 | \n", "451 | \n", "451 | \n", "451 | \n", "451 | \n", "451 | \n", "346 | \n", "451 | \n", "26 | \n", "349 | \n", "451 | \n", "451 | \n", "451 | \n", "451 | \n", "451 | \n", "451 | \n", "451 | \n", "451 | \n", "451 | \n", "
| submersible | \n", "submersible | \n", "ksb | \n", "1415 | \n", "1415 | \n", "1415 | \n", "1411 | \n", "1415 | \n", "1410 | \n", "1415 | \n", "1415 | \n", "1415 | \n", "1415 | \n", "1415 | \n", "1415 | \n", "1415 | \n", "1415 | \n", "1415 | \n", "1415 | \n", "1415 | \n", "1415 | \n", "1339 | \n", "1415 | \n", "1195 | \n", "1410 | \n", "1415 | \n", "1415 | \n", "1415 | \n", "1415 | \n", "1415 | \n", "1415 | \n", "1415 | \n", "1415 | \n", "1415 | \n", "
| submersible | \n", "4764 | \n", "4764 | \n", "4764 | \n", "4408 | \n", "4764 | \n", "4406 | \n", "4764 | \n", "4764 | \n", "4764 | \n", "4764 | \n", "4764 | \n", "4656 | \n", "4764 | \n", "4764 | \n", "4764 | \n", "4764 | \n", "4764 | \n", "4764 | \n", "4236 | \n", "4764 | \n", "3399 | \n", "4444 | \n", "4764 | \n", "4764 | \n", "4764 | \n", "4764 | \n", "4764 | \n", "4764 | \n", "4764 | \n", "4764 | \n", "4764 | \n", "||
| wind-powered | \n", "wind-powered | \n", "windmill | \n", "117 | \n", "117 | \n", "117 | \n", "112 | \n", "117 | \n", "112 | \n", "117 | \n", "117 | \n", "117 | \n", "117 | \n", "117 | \n", "117 | \n", "117 | \n", "117 | \n", "117 | \n", "117 | \n", "117 | \n", "117 | \n", "116 | \n", "117 | \n", "72 | \n", "105 | \n", "117 | \n", "117 | \n", "117 | \n", "117 | \n", "117 | \n", "117 | \n", "117 | \n", "117 | \n", "117 | \n", "
| \n", " | id | \n", "amount_tsh | \n", "date_recorded | \n", "gps_height | \n", "longitude | \n", "latitude | \n", "num_private | \n", "region_code | \n", "district_code | \n", "population | \n", "construction_year | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|
| count | \n", "38691.000000 | \n", "38691.000000 | \n", "38691 | \n", "38691.000000 | \n", "38691.000000 | \n", "38691.000000 | \n", "38691.000000 | \n", "38691.000000 | \n", "38691.000000 | \n", "38691.000000 | \n", "38691.000000 | \n", "
| mean | \n", "37083.008736 | \n", "466.457534 | \n", "2012-05-25 15:54:09.406838784 | \n", "1002.367760 | \n", "35.983262 | \n", "-6.235372 | \n", "0.707710 | \n", "15.706495 | \n", "5.969786 | \n", "269.799617 | \n", "1996.814686 | \n", "
| min | \n", "1.000000 | \n", "0.000000 | \n", "2004-01-07 00:00:00 | \n", "-63.000000 | \n", "29.607122 | \n", "-11.649440 | \n", "0.000000 | \n", "2.000000 | \n", "1.000000 | \n", "0.000000 | \n", "1960.000000 | \n", "
| 25% | \n", "18489.500000 | \n", "0.000000 | \n", "2011-03-21 00:00:00 | \n", "372.000000 | \n", "34.676719 | \n", "-8.755274 | \n", "0.000000 | \n", "4.000000 | \n", "2.000000 | \n", "30.000000 | \n", "1987.000000 | \n", "
| 50% | \n", "37078.000000 | \n", "0.000000 | \n", "2013-01-23 00:00:00 | \n", "1154.000000 | \n", "36.648187 | \n", "-6.064216 | \n", "0.000000 | \n", "11.000000 | \n", "3.000000 | \n", "150.000000 | \n", "2000.000000 | \n", "
| 75% | \n", "55514.500000 | \n", "200.000000 | \n", "2013-02-23 00:00:00 | \n", "1488.000000 | \n", "37.803940 | \n", "-3.650661 | \n", "0.000000 | \n", "16.000000 | \n", "5.000000 | \n", "305.000000 | \n", "2008.000000 | \n", "
| max | \n", "74247.000000 | \n", "350000.000000 | \n", "2013-12-03 00:00:00 | \n", "2770.000000 | \n", "40.345193 | \n", "-1.042375 | \n", "1776.000000 | \n", "99.000000 | \n", "63.000000 | \n", "30500.000000 | \n", "2013.000000 | \n", "
| std | \n", "21420.922010 | \n", "3541.036030 | \n", "NaN | \n", "618.078669 | \n", "2.558709 | \n", "2.761317 | \n", "15.083957 | \n", "21.003006 | \n", "10.700673 | \n", "552.343746 | \n", "12.472045 | \n", "
| \n", " | id | \n", "amount_tsh | \n", "date_recorded | \n", "funder | \n", "gps_height | \n", "installer | \n", "longitude | \n", "latitude | \n", "wpt_name | \n", "num_private | \n", "basin | \n", "subvillage | \n", "region | \n", "region_code | \n", "district_code | \n", "lga | \n", "ward | \n", "population | \n", "public_meeting | \n", "scheme_name | \n", "permit | \n", "construction_year | \n", "extraction_type_group | \n", "management | \n", "payment | \n", "water_quality | \n", "quantity | \n", "source | \n", "waterpoint_type | \n", "status_group | \n", "decade | \n", "installer_cat | \n", "funder_cat | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 21 | \n", "6091 | \n", "0.0 | \n", "2013-02-10 | \n", "Dwsp | \n", "0 | \n", "DWE | \n", "0.0 | \n", "-2.000000e-08 | \n", "Muungano | \n", "0 | \n", "Lake Victoria | \n", "Ibabachegu | \n", "Shinyanga | \n", "17 | \n", "1 | \n", "Bariadi | \n", "Ikungulyabashashi | \n", "0 | \n", "NaN | \n", "NaN | \n", "False | \n", "2000 | \n", "swn 80 | \n", "wug | \n", "unknown | \n", "unknown | \n", "unknown | \n", "shallow well | \n", "hand pump | \n", "functional | \n", "00s | \n", "DWE | \n", "Dwsp | \n", "
| 53 | \n", "32376 | \n", "0.0 | \n", "2011-08-01 | \n", "Government Of Tanzania | \n", "0 | \n", "Government | \n", "0.0 | \n", "-2.000000e-08 | \n", "Polisi | \n", "0 | \n", "Lake Victoria | \n", "Center | \n", "Mwanza | \n", "19 | \n", "6 | \n", "Geita | \n", "Nyang'hwale | \n", "0 | \n", "True | \n", "Nyang'hwale | \n", "True | \n", "2000 | \n", "submersible | \n", "vwc | \n", "unknown | \n", "unknown | \n", "dry | \n", "machine dbh | \n", "communal standpipe multiple | \n", "non functional | \n", "00s | \n", "Government | \n", "Government Of Tanzania | \n", "
| 168 | \n", "72678 | \n", "0.0 | \n", "2013-01-30 | \n", "Wvt | \n", "0 | \n", "WVT | \n", "0.0 | \n", "-2.000000e-08 | \n", "Wvt Tanzania | \n", "0 | \n", "Lake Victoria | \n", "Ilula | \n", "Shinyanga | \n", "17 | \n", "1 | \n", "Bariadi | \n", "Chinamili | \n", "0 | \n", "False | \n", "NaN | \n", "False | \n", "2000 | \n", "gravity | \n", "parastatal | \n", "other | \n", "soft | \n", "seasonal | \n", "rainwater harvesting | \n", "communal standpipe | \n", "functional | \n", "00s | \n", "Others | \n", "Others | \n", "
| 177 | \n", "56725 | \n", "0.0 | \n", "2013-01-17 | \n", "Netherlands | \n", "0 | \n", "DWE | \n", "0.0 | \n", "-2.000000e-08 | \n", "Kikundi Cha Wakina Mama | \n", "0 | \n", "Lake Victoria | \n", "Mahaha | \n", "Shinyanga | \n", "17 | \n", "1 | \n", "Bariadi | \n", "Bunamhala | \n", "0 | \n", "NaN | \n", "NaN | \n", "False | \n", "2000 | \n", "other | \n", "wug | \n", "unknown | \n", "soft | \n", "enough | \n", "shallow well | \n", "other | \n", "non functional | \n", "00s | \n", "DWE | \n", "Others | \n", "
| 253 | \n", "13042 | \n", "0.0 | \n", "2012-10-29 | \n", "Hesawa | \n", "0 | \n", "DWE | \n", "0.0 | \n", "-2.000000e-08 | \n", "Kwakisusi | \n", "0 | \n", "Lake Victoria | \n", "Nyamatala | \n", "Mwanza | \n", "19 | \n", "2 | \n", "Magu | \n", "Malili | \n", "0 | \n", "True | \n", "NaN | \n", "True | \n", "2000 | \n", "nira/tanira | \n", "vwc | \n", "never pay | \n", "soft | \n", "insufficient | \n", "shallow well | \n", "hand pump | \n", "functional needs repair | \n", "00s | \n", "DWE | \n", "Hesawa | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 59189 | \n", "62177 | \n", "0.0 | \n", "2011-07-18 | \n", "Dwsp | \n", "0 | \n", "DWE | \n", "0.0 | \n", "-2.000000e-08 | \n", "Wazazo | \n", "0 | \n", "Lake Victoria | \n", "Mwamabuli | \n", "Shinyanga | \n", "17 | \n", "1 | \n", "Bariadi | \n", "Mhunze | \n", "0 | \n", "True | \n", "NaN | \n", "False | \n", "2000 | \n", "nira/tanira | \n", "wug | \n", "other | \n", "soft | \n", "enough | \n", "shallow well | \n", "hand pump | \n", "functional needs repair | \n", "00s | \n", "DWE | \n", "Dwsp | \n", "
| 59208 | \n", "3631 | \n", "0.0 | \n", "2013-01-22 | \n", "Dwsp | \n", "0 | \n", "DWE | \n", "0.0 | \n", "-2.000000e-08 | \n", "Mtakuja | \n", "0 | \n", "Lake Victoria | \n", "Mbiti | \n", "Shinyanga | \n", "17 | \n", "1 | \n", "Bariadi | \n", "Kinang'weli | \n", "0 | \n", "NaN | \n", "NaN | \n", "False | \n", "2000 | \n", "nira/tanira | \n", "wug | \n", "unknown | \n", "soft | \n", "enough | \n", "shallow well | \n", "hand pump | \n", "functional | \n", "00s | \n", "DWE | \n", "Dwsp | \n", "
| 59295 | \n", "60843 | \n", "0.0 | \n", "2011-07-19 | \n", "Rwssp | \n", "0 | \n", "DWE | \n", "0.0 | \n", "-2.000000e-08 | \n", "Maendeleo | \n", "0 | \n", "Lake Victoria | \n", "Mwamalizi | \n", "Shinyanga | \n", "17 | \n", "1 | \n", "Bariadi | \n", "Chinamili | \n", "0 | \n", "True | \n", "NaN | \n", "False | \n", "2000 | \n", "nira/tanira | \n", "wug | \n", "never pay | \n", "soft | \n", "enough | \n", "shallow well | \n", "hand pump | \n", "functional needs repair | \n", "00s | \n", "DWE | \n", "Rwssp | \n", "
| 59324 | \n", "748 | \n", "0.0 | \n", "2013-01-22 | \n", "World Vision | \n", "0 | \n", "world vision | \n", "0.0 | \n", "-2.000000e-08 | \n", "Mwazwilo | \n", "0 | \n", "Lake Victoria | \n", "Mbita | \n", "Shinyanga | \n", "17 | \n", "1 | \n", "Bariadi | \n", "Mbita | \n", "0 | \n", "NaN | \n", "NaN | \n", "False | \n", "2000 | \n", "nira/tanira | \n", "wug | \n", "unknown | \n", "soft | \n", "enough | \n", "shallow well | \n", "hand pump | \n", "functional | \n", "00s | \n", "world vision | \n", "World Vision | \n", "
| 59374 | \n", "49651 | \n", "0.0 | \n", "2012-10-29 | \n", "Rwssp | \n", "0 | \n", "DWE | \n", "0.0 | \n", "-2.000000e-08 | \n", "Nguvu Kazi | \n", "0 | \n", "Lake Victoria | \n", "Mwamtani A | \n", "Shinyanga | \n", "17 | \n", "1 | \n", "Bariadi | \n", "Sagata | \n", "0 | \n", "True | \n", "NaN | \n", "False | \n", "2000 | \n", "nira/tanira | \n", "wug | \n", "other | \n", "soft | \n", "enough | \n", "shallow well | \n", "hand pump | \n", "functional | \n", "00s | \n", "DWE | \n", "Rwssp | \n", "
1812 rows × 33 columns
\n", "| \n", " | id | \n", "amount_tsh | \n", "date_recorded | \n", "gps_height | \n", "longitude | \n", "latitude | \n", "num_private | \n", "region_code | \n", "district_code | \n", "population | \n", "construction_year | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|
| count | \n", "57588.00000 | \n", "57588.000000 | \n", "57588 | \n", "57588.000000 | \n", "57588.000000 | \n", "57588.000000 | \n", "57588.000000 | \n", "57588.000000 | \n", "57588.000000 | \n", "57588.000000 | \n", "57588.000000 | \n", "
| mean | \n", "37106.48807 | \n", "327.645219 | \n", "2012-03-25 18:58:12.727651328 | \n", "689.325137 | \n", "35.149669 | \n", "-5.885572 | \n", "0.489060 | \n", "15.217615 | \n", "5.728311 | \n", "185.570831 | \n", "1997.859919 | \n", "
| min | \n", "0.00000 | \n", "0.000000 | \n", "2002-10-14 00:00:00 | \n", "-90.000000 | \n", "29.607122 | \n", "-11.649440 | \n", "0.000000 | \n", "1.000000 | \n", "0.000000 | \n", "0.000000 | \n", "1960.000000 | \n", "
| 25% | \n", "18522.75000 | \n", "0.000000 | \n", "2011-03-30 00:00:00 | \n", "0.000000 | \n", "33.285100 | \n", "-8.643841 | \n", "0.000000 | \n", "5.000000 | \n", "2.000000 | \n", "0.000000 | \n", "1995.000000 | \n", "
| 50% | \n", "37054.50000 | \n", "0.000000 | \n", "2012-10-08 00:00:00 | \n", "426.000000 | \n", "35.005943 | \n", "-5.172704 | \n", "0.000000 | \n", "12.000000 | \n", "3.000000 | \n", "35.000000 | \n", "2000.000000 | \n", "
| 75% | \n", "55667.25000 | \n", "30.000000 | \n", "2013-02-10 00:00:00 | \n", "1332.000000 | \n", "37.233712 | \n", "-3.372824 | \n", "0.000000 | \n", "17.000000 | \n", "5.000000 | \n", "230.000000 | \n", "2004.000000 | \n", "
| max | \n", "74247.00000 | \n", "350000.000000 | \n", "2013-12-03 00:00:00 | \n", "2770.000000 | \n", "40.345193 | \n", "-0.998464 | \n", "1776.000000 | \n", "99.000000 | \n", "80.000000 | \n", "30500.000000 | \n", "2013.000000 | \n", "
| std | \n", "21454.51421 | \n", "3043.831403 | \n", "NaN | \n", "693.564188 | \n", "2.607428 | \n", "2.809876 | \n", "12.426954 | \n", "17.855254 | \n", "9.760254 | \n", "477.744239 | \n", "10.331744 | \n", "
| \n", " | id | \n", "date_recorded | \n", "gps_height | \n", "longitude | \n", "latitude | \n", "num_private | \n", "district_code | \n", "population | \n", "construction_year | \n", "
|---|---|---|---|---|---|---|---|---|---|
| count | \n", "38019.000000 | \n", "38019 | \n", "38019.000000 | \n", "38019.000000 | \n", "38019.000000 | \n", "38019.000000 | \n", "38019.000000 | \n", "38019.000000 | \n", "38019.000000 | \n", "
| mean | \n", "37107.559115 | \n", "2012-05-29 21:01:34.026670848 | \n", "969.889634 | \n", "36.074387 | \n", "-6.139781 | \n", "0.740788 | \n", "6.299456 | \n", "281.087167 | \n", "1996.908283 | \n", "
| min | \n", "1.000000 | \n", "2004-01-07 00:00:00 | \n", "-90.000000 | \n", "29.607122 | \n", "-11.649440 | \n", "0.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1960.000000 | \n", "
| 25% | \n", "18514.500000 | \n", "2011-03-22 00:00:00 | \n", "347.000000 | \n", "34.715340 | \n", "-8.388839 | \n", "0.000000 | \n", "2.000000 | \n", "40.000000 | \n", "1988.000000 | \n", "
| 50% | \n", "37128.000000 | \n", "2013-01-23 00:00:00 | \n", "1135.000000 | \n", "36.706815 | \n", "-5.750877 | \n", "0.000000 | \n", "3.000000 | \n", "150.000000 | \n", "2000.000000 | \n", "
| 75% | \n", "55505.500000 | \n", "2013-02-23 00:00:00 | \n", "1465.000000 | \n", "37.940149 | \n", "-3.597016 | \n", "0.000000 | \n", "5.000000 | \n", "324.000000 | \n", "2008.000000 | \n", "
| max | \n", "74247.000000 | \n", "2013-12-03 00:00:00 | \n", "2770.000000 | \n", "40.345193 | \n", "-1.042375 | \n", "1776.000000 | \n", "67.000000 | \n", "30500.000000 | \n", "2013.000000 | \n", "
| std | \n", "21406.803661 | \n", "NaN | \n", "612.544787 | \n", "2.586779 | \n", "2.737733 | \n", "15.288297 | \n", "11.303334 | \n", "564.687660 | \n", "12.425377 | \n", "
| \n", " | Unnamed: 0 | \n", "id | \n", "funder | \n", "gps_height | \n", "installer | \n", "longitude | \n", "latitude | \n", "basin | \n", "region | \n", "district_code | \n", "lga | \n", "ward | \n", "population | \n", "public_meeting | \n", "permit | \n", "construction_year | \n", "extraction_type_group | \n", "management | \n", "payment | \n", "water_quality | \n", "quantity | \n", "source | \n", "waterpoint_type | \n", "status_group | \n", "decade | \n", "installer_cat | \n", "funder_cat | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "0 | \n", "69572 | \n", "Roman | \n", "1390 | \n", "Roman | \n", "34.938093 | \n", "-9.856322 | \n", "Lake Nyasa | \n", "Iringa | \n", "5 | \n", "Ludewa | \n", "Mundindi | \n", "109 | \n", "True | \n", "False | \n", "1999 | \n", "gravity | \n", "vwc | \n", "pay annually | \n", "soft | \n", "enough | \n", "spring | \n", "communal standpipe | \n", "0 | \n", "90s | \n", "Others | \n", "Others | \n", "
| 1 | \n", "1 | \n", "8776 | \n", "Grumeti | \n", "1399 | \n", "GRUMETI | \n", "34.698766 | \n", "-2.147466 | \n", "Lake Victoria | \n", "Mara | \n", "2 | \n", "Serengeti | \n", "Natta | \n", "280 | \n", "True | \n", "True | \n", "2010 | \n", "gravity | \n", "wug | \n", "never pay | \n", "soft | \n", "insufficient | \n", "rainwater harvesting | \n", "communal standpipe | \n", "0 | \n", "10s | \n", "Others | \n", "Others | \n", "
| 2 | \n", "2 | \n", "34310 | \n", "Lottery Club | \n", "686 | \n", "world vision | \n", "37.460664 | \n", "-3.821329 | \n", "Pangani | \n", "Manyara | \n", "4 | \n", "Simanjiro | \n", "Ngorika | \n", "250 | \n", "True | \n", "True | \n", "2009 | \n", "gravity | \n", "vwc | \n", "pay per bucket | \n", "soft | \n", "enough | \n", "dam | \n", "communal standpipe multiple | \n", "0 | \n", "00s | \n", "world vision | \n", "Others | \n", "
| 3 | \n", "3 | \n", "67743 | \n", "Unicef | \n", "263 | \n", "Unicef | \n", "38.486161 | \n", "-11.155298 | \n", "Ruvuma / Southern Coast | \n", "Mtwara | \n", "63 | \n", "Nanyumbu | \n", "Nanyumbu | \n", "58 | \n", "True | \n", "True | \n", "1986 | \n", "submersible | \n", "vwc | \n", "never pay | \n", "soft | \n", "dry | \n", "machine dbh | \n", "communal standpipe multiple | \n", "2 | \n", "80s | \n", "Others | \n", "Unicef | \n", "
| 4 | \n", "4 | \n", "19728 | \n", "Action In A | \n", "0 | \n", "Artisan | \n", "31.130847 | \n", "-1.825359 | \n", "Lake Victoria | \n", "Kagera | \n", "1 | \n", "Karagwe | \n", "Nyakasimbi | \n", "281 | \n", "True | \n", "True | \n", "2000 | \n", "gravity | \n", "other | \n", "never pay | \n", "soft | \n", "seasonal | \n", "rainwater harvesting | \n", "communal standpipe | \n", "0 | \n", "00s | \n", "Others | \n", "Others | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 59395 | \n", "59395 | \n", "60739 | \n", "Germany Republi | \n", "1210 | \n", "CES | \n", "37.169807 | \n", "-3.253847 | \n", "Pangani | \n", "Kilimanjaro | \n", "5 | \n", "Hai | \n", "Masama Magharibi | \n", "125 | \n", "True | \n", "True | \n", "1999 | \n", "gravity | \n", "water board | \n", "pay per bucket | \n", "soft | \n", "enough | \n", "spring | \n", "communal standpipe | \n", "0 | \n", "90s | \n", "Others | \n", "Germany Republi | \n", "
| 59396 | \n", "59396 | \n", "27263 | \n", "Cefa-njombe | \n", "1212 | \n", "Cefa | \n", "35.249991 | \n", "-9.070629 | \n", "Rufiji | \n", "Iringa | \n", "4 | \n", "Njombe | \n", "Ikondo | \n", "56 | \n", "True | \n", "True | \n", "1996 | \n", "gravity | \n", "vwc | \n", "pay annually | \n", "soft | \n", "enough | \n", "river | \n", "communal standpipe | \n", "0 | \n", "90s | \n", "Others | \n", "Others | \n", "
| 59397 | \n", "59397 | \n", "37057 | \n", "Unknown | \n", "0 | \n", "Unknown | \n", "34.017087 | \n", "-8.750434 | \n", "Rufiji | \n", "Mbeya | \n", "7 | \n", "Mbarali | \n", "Chimala | \n", "281 | \n", "True | \n", "False | \n", "2000 | \n", "swn 80 | \n", "vwc | \n", "pay monthly | \n", "fluoride | \n", "enough | \n", "machine dbh | \n", "hand pump | \n", "0 | \n", "00s | \n", "Unknown | \n", "Unknown | \n", "
| 59398 | \n", "59398 | \n", "31282 | \n", "Malec | \n", "0 | \n", "Musa | \n", "35.861315 | \n", "-6.378573 | \n", "Rufiji | \n", "Dodoma | \n", "4 | \n", "Chamwino | \n", "Mvumi Makulu | \n", "281 | \n", "True | \n", "True | \n", "2000 | \n", "nira/tanira | \n", "vwc | \n", "never pay | \n", "soft | \n", "insufficient | \n", "shallow well | \n", "hand pump | \n", "0 | \n", "00s | \n", "Others | \n", "Others | \n", "
| 59399 | \n", "59399 | \n", "26348 | \n", "World Bank | \n", "191 | \n", "World | \n", "38.104048 | \n", "-6.747464 | \n", "Wami / Ruvu | \n", "Morogoro | \n", "2 | \n", "Morogoro Rural | \n", "Ngerengere | \n", "150 | \n", "True | \n", "True | \n", "2002 | \n", "nira/tanira | \n", "vwc | \n", "pay when scheme fails | \n", "salty | \n", "enough | \n", "shallow well | \n", "hand pump | \n", "0 | \n", "00s | \n", "Others | \n", "World Bank | \n", "
59400 rows × 27 columns
\n", "| \n", " | id | \n", "amount_tsh | \n", "date_recorded | \n", "funder | \n", "gps_height | \n", "installer | \n", "longitude | \n", "latitude | \n", "wpt_name | \n", "num_private | \n", "basin | \n", "subvillage | \n", "region | \n", "region_code | \n", "district_code | \n", "lga | \n", "ward | \n", "population | \n", "public_meeting | \n", "scheme_name | \n", "permit | \n", "construction_year | \n", "extraction_type_group | \n", "management | \n", "payment | \n", "water_quality | \n", "quantity | \n", "source | \n", "waterpoint_type | \n", "decade | \n", "installer_cat | \n", "funder_cat | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 28 | \n", "44718 | \n", "0.0 | \n", "2013-02-04 | \n", "Dwsp | \n", "0 | \n", "DWE | \n", "0.0 | \n", "-2.000000e-08 | \n", "Nkubanija | \n", "0 | \n", "Lake Victoria | \n", "Nkonze | \n", "Shinyanga | \n", "17 | \n", "1 | \n", "Bariadi | \n", "Lugulu | \n", "0 | \n", "True | \n", "NaN | \n", "False | \n", "2000 | \n", "nira/tanira | \n", "wug | \n", "pay when scheme fails | \n", "salty abandoned | \n", "enough | \n", "shallow well | \n", "hand pump | \n", "00s | \n", "DWE | \n", "Dwsp | \n", "
| 55 | \n", "15962 | \n", "0.0 | \n", "2011-08-01 | \n", "Hesawa | \n", "0 | \n", "HESAWA | \n", "0.0 | \n", "-2.000000e-08 | \n", "Bombani | \n", "0 | \n", "Lake Victoria | \n", "Nyang'Hulukulu | \n", "Mwanza | \n", "19 | \n", "6 | \n", "Geita | \n", "Nyang'hwale | \n", "0 | \n", "True | \n", "Borehole | \n", "True | \n", "2000 | \n", "other | \n", "vwc | \n", "unknown | \n", "unknown | \n", "dry | \n", "machine dbh | \n", "other | \n", "00s | \n", "HESAWA | \n", "Hesawa | \n", "
| 68 | \n", "45374 | \n", "0.0 | \n", "2013-02-16 | \n", "Dwsp | \n", "0 | \n", "DWE | \n", "0.0 | \n", "-2.000000e-08 | \n", "Maendeleo | \n", "0 | \n", "Lake Victoria | \n", "Magua | \n", "Mwanza | \n", "17 | \n", "1 | \n", "Magu | \n", "Nkungulu | \n", "0 | \n", "NaN | \n", "NaN | \n", "False | \n", "2000 | \n", "nira/tanira | \n", "wug | \n", "unknown | \n", "soft | \n", "enough | \n", "shallow well | \n", "hand pump | \n", "00s | \n", "DWE | \n", "Dwsp | \n", "
| 139 | \n", "68616 | \n", "0.0 | \n", "2011-07-19 | \n", "Plan International | \n", "0 | \n", "Plan Internationa | \n", "0.0 | \n", "-2.000000e-08 | \n", "Mwatulole | \n", "0 | \n", "Lake Victoria | \n", "Ihayabuyaga | \n", "Mwanza | \n", "19 | \n", "6 | \n", "Geita | \n", "Kalangalala | \n", "0 | \n", "True | \n", "Borehole | \n", "True | \n", "2000 | \n", "india mark ii | \n", "vwc | \n", "never pay | \n", "soft | \n", "insufficient | \n", "machine dbh | \n", "hand pump | \n", "00s | \n", "Others | \n", "Others | \n", "
| 150 | \n", "51539 | \n", "0.0 | \n", "2013-02-09 | \n", "Dwsp | \n", "0 | \n", "DWE | \n", "0.0 | \n", "-2.000000e-08 | \n", "Mwashidakwa | \n", "0 | \n", "Lake Victoria | \n", "Magongolo | \n", "Shinyanga | \n", "17 | \n", "1 | \n", "Bariadi | \n", "Gamboshi | \n", "0 | \n", "NaN | \n", "NaN | \n", "False | \n", "2000 | \n", "nira/tanira | \n", "wug | \n", "unknown | \n", "soft | \n", "enough | \n", "shallow well | \n", "hand pump | \n", "00s | \n", "DWE | \n", "Dwsp | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 14649 | \n", "30454 | \n", "0.0 | \n", "2011-08-08 | \n", "Nyamigogo | \n", "0 | \n", "MASWI | \n", "0.0 | \n", "-2.000000e-08 | \n", "Husein | \n", "0 | \n", "Lake Victoria | \n", "Nyaweshi | \n", "Mwanza | \n", "19 | \n", "6 | \n", "Geita | \n", "Kharumwa | \n", "0 | \n", "True | \n", "Borehole | \n", "True | \n", "2000 | \n", "india mark ii | \n", "vwc | \n", "never pay | \n", "soft | \n", "insufficient | \n", "shallow well | \n", "hand pump | \n", "00s | \n", "Others | \n", "Others | \n", "
| 14676 | \n", "44766 | \n", "0.0 | \n", "2013-02-16 | \n", "Dwsp | \n", "0 | \n", "DWE | \n", "0.0 | \n", "-2.000000e-08 | \n", "Imalanota | \n", "0 | \n", "Lake Victoria | \n", "Busumabwi | \n", "Mwanza | \n", "17 | \n", "1 | \n", "Magu | \n", "Nkungulu | \n", "0 | \n", "False | \n", "NaN | \n", "False | \n", "2000 | \n", "swn 80 | \n", "wug | \n", "other | \n", "soft | \n", "enough | \n", "shallow well | \n", "hand pump | \n", "00s | \n", "DWE | \n", "Dwsp | \n", "
| 14766 | \n", "56279 | \n", "0.0 | \n", "2012-11-04 | \n", "Government Of Tanzania | \n", "0 | \n", "RWE | \n", "0.0 | \n", "-2.000000e-08 | \n", "Ofisini | \n", "0 | \n", "Lake Victoria | \n", "Madukani | \n", "Mwanza | \n", "19 | \n", "2 | \n", "Magu | \n", "Mkula | \n", "0 | \n", "True | \n", "Kalemela Water Supply | \n", "True | \n", "2000 | \n", "gravity | \n", "vwc | \n", "never pay | \n", "soft | \n", "insufficient | \n", "lake | \n", "communal standpipe multiple | \n", "00s | \n", "RWE | \n", "Government Of Tanzania | \n", "
| 14831 | \n", "37994 | \n", "0.0 | \n", "2012-10-26 | \n", "Hesawa | \n", "0 | \n", "DWE | \n", "0.0 | \n", "-2.000000e-08 | \n", "Kwa Masilili | \n", "0 | \n", "Lake Victoria | \n", "Solima A | \n", "Mwanza | \n", "19 | \n", "2 | \n", "Magu | \n", "Kabita | \n", "0 | \n", "True | \n", "NaN | \n", "True | \n", "2000 | \n", "nira/tanira | \n", "vwc | \n", "never pay | \n", "salty | \n", "enough | \n", "shallow well | \n", "hand pump | \n", "00s | \n", "DWE | \n", "Hesawa | \n", "
| 14841 | \n", "64579 | \n", "0.0 | \n", "2012-10-26 | \n", "Dwsp | \n", "0 | \n", "DWE | \n", "0.0 | \n", "-2.000000e-08 | \n", "Iguna | \n", "0 | \n", "Lake Victoria | \n", "Nyerere | \n", "Shinyanga | \n", "17 | \n", "1 | \n", "Bariadi | \n", "Kasoli | \n", "0 | \n", "NaN | \n", "NaN | \n", "False | \n", "2000 | \n", "swn 80 | \n", "wug | \n", "unknown | \n", "soft | \n", "enough | \n", "shallow well | \n", "hand pump | \n", "00s | \n", "DWE | \n", "Dwsp | \n", "
457 rows × 32 columns
\n", "RandomForestClassifier(criterion='entropy', min_samples_leaf=2,\n",
" n_estimators=400, random_state=42)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. RandomForestClassifier(criterion='entropy', min_samples_leaf=2,\n",
" n_estimators=400, random_state=42)VotingClassifier(estimators=[('rf',\n",
" RandomForestClassifier(criterion='entropy',\n",
" min_samples_leaf=2,\n",
" n_estimators=400,\n",
" random_state=42)),\n",
" ('xgb',\n",
" XGBClassifier(base_score=None, booster=None,\n",
" callbacks=None,\n",
" colsample_bylevel=None,\n",
" colsample_bynode=None,\n",
" colsample_bytree=None, device=None,\n",
" early_stopping_rounds=None,\n",
" enable_categorical=False,\n",
" eval_metric=None,\n",
" featu...\n",
" max_cat_to_onehot=None,\n",
" max_delta_step=None, max_depth=None,\n",
" max_leaves=None,\n",
" min_child_weight=None, missing=nan,\n",
" monotone_constraints=None,\n",
" multi_strategy=None,\n",
" n_estimators=500, n_jobs=None,\n",
" num_parallel_tree=None, ...)),\n",
" ('lgb',\n",
" LGBMClassifier(class_weight='balanced',\n",
" max_depth=7, n_estimators=300,\n",
" num_class=3,\n",
" objective='multiclass',\n",
" random_state=42))],\n",
" n_jobs=-1)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. VotingClassifier(estimators=[('rf',\n",
" RandomForestClassifier(criterion='entropy',\n",
" min_samples_leaf=2,\n",
" n_estimators=400,\n",
" random_state=42)),\n",
" ('xgb',\n",
" XGBClassifier(base_score=None, booster=None,\n",
" callbacks=None,\n",
" colsample_bylevel=None,\n",
" colsample_bynode=None,\n",
" colsample_bytree=None, device=None,\n",
" early_stopping_rounds=None,\n",
" enable_categorical=False,\n",
" eval_metric=None,\n",
" featu...\n",
" max_cat_to_onehot=None,\n",
" max_delta_step=None, max_depth=None,\n",
" max_leaves=None,\n",
" min_child_weight=None, missing=nan,\n",
" monotone_constraints=None,\n",
" multi_strategy=None,\n",
" n_estimators=500, n_jobs=None,\n",
" num_parallel_tree=None, ...)),\n",
" ('lgb',\n",
" LGBMClassifier(class_weight='balanced',\n",
" max_depth=7, n_estimators=300,\n",
" num_class=3,\n",
" objective='multiclass',\n",
" random_state=42))],\n",
" n_jobs=-1)RandomForestClassifier(criterion='entropy', min_samples_leaf=2,\n",
" n_estimators=400, random_state=42)XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
" colsample_bylevel=None, colsample_bynode=None,\n",
" colsample_bytree=None, device=None, early_stopping_rounds=None,\n",
" enable_categorical=False, eval_metric=None, feature_types=None,\n",
" feature_weights=None, gamma=None, grow_policy=None,\n",
" importance_type=None, interaction_constraints=None,\n",
" learning_rate=None, max_bin=None, max_cat_threshold=None,\n",
" max_cat_to_onehot=None, max_delta_step=None, max_depth=None,\n",
" max_leaves=None, min_child_weight=None, missing=nan,\n",
" monotone_constraints=None, multi_strategy=None, n_estimators=500,\n",
" n_jobs=None, num_parallel_tree=None, ...)LGBMClassifier(class_weight='balanced', max_depth=7, n_estimators=300,\n",
" num_class=3, objective='multiclass', random_state=42)RandomizedSearchCV(cv=StratifiedKFold(n_splits=5, random_state=42, shuffle=True),\n",
" estimator=LGBMClassifier(class_weight='balanced',\n",
" max_depth=7, n_estimators=300,\n",
" num_class=3, objective='multiclass',\n",
" random_state=42),\n",
" n_iter=30, n_jobs=-1,\n",
" param_distributions={'learning_rate': [0.01, 0.1, 0.2],\n",
" 'max_depth': [5, 7, 10],\n",
" 'min_child_samples': [10, 20, 30],\n",
" 'n_estimators': [100, 200, 300],\n",
" 'num_leaves': [20, 31, 50]},\n",
" random_state=42, scoring='accuracy', verbose=1)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. RandomizedSearchCV(cv=StratifiedKFold(n_splits=5, random_state=42, shuffle=True),\n",
" estimator=LGBMClassifier(class_weight='balanced',\n",
" max_depth=7, n_estimators=300,\n",
" num_class=3, objective='multiclass',\n",
" random_state=42),\n",
" n_iter=30, n_jobs=-1,\n",
" param_distributions={'learning_rate': [0.01, 0.1, 0.2],\n",
" 'max_depth': [5, 7, 10],\n",
" 'min_child_samples': [10, 20, 30],\n",
" 'n_estimators': [100, 200, 300],\n",
" 'num_leaves': [20, 31, 50]},\n",
" random_state=42, scoring='accuracy', verbose=1)LGBMClassifier(class_weight='balanced', learning_rate=0.2, max_depth=7,\n",
" min_child_samples=30, n_estimators=300, num_class=3,\n",
" num_leaves=50, objective='multiclass', random_state=42)LGBMClassifier(class_weight='balanced', learning_rate=0.2, max_depth=7,\n",
" min_child_samples=30, n_estimators=300, num_class=3,\n",
" num_leaves=50, objective='multiclass', random_state=42)